library(tidyverse) # for data cleaning and plotting
library(gardenR) # for Lisa's garden data
library(lubridate) # for date manipulation
library(openintro) # for the abbr2state() function
library(palmerpenguins)# for Palmer penguin data
library(maps) # for map data
library(ggmap) # for mapping points on maps
library(gplots) # for col2hex() function
library(RColorBrewer) # for color palettes
library(sf) # for working with spatial data
library(leaflet) # for highly customizable mapping
library(ggthemes) # for more themes (including theme_map())
library(plotly) # for the ggplotly() - basic interactivity
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
library(gifski) # need the library for creating gifs but don't need to load each time
library(shiny) # for creating interactive apps
library(ggimage)
theme_set(theme_minimal())
# SNCF Train data
small_trains <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/small_trains.csv")
# Lisa's garden data
data("garden_harvest")
# Lisa's Mallorca cycling data
mallorca_bike_day7 <- read_csv("https://www.dropbox.com/s/zc6jan4ltmjtvy0/mallorca_bike_day7.csv?dl=1") %>%
select(1:4, speed)
# Heather Lendway's Ironman 70.3 Pan Am championships Panama data
panama_swim <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_swim_20160131.csv")
panama_bike <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_bike_20160131.csv")
panama_run <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_run_20160131.csv")
#COVID-19 data from the New York Times
covid19 <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
ggplotly() function.plastics <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2021/2021-01-26/plastics.csv')
plastics_long <- plastics %>%
pivot_longer(cols = empty:grand_total,
names_to = "plastic_type",
values_to = "count")
interactive_plastic <- plastics %>%
filter(parent_company == "Grand Total",
grand_total > 5000, country != empty) %>%
select(country, grand_total, parent_company, year, volunteers) %>%
ggplot() +
geom_point(aes(x = grand_total, y = country, color = volunteers)) +
labs(title = "Countries that produced the most plastic waste ( > 5000)",
x = "Amount of plastic waste", y = "Country", caption = "Jennifer Huang")
ggplotly(interactive_plastic)
\(~\)
small_trains dataset that contains data from the SNCF (National Society of French Railways). These are Tidy Tuesday data! Read more about it here.small_trains %>%
filter(service != "NA") %>%
mutate(month = month(month, label = TRUE, abbr = TRUE)) %>%
ggplot(aes(x = journey_time_avg,
y = num_arriving_late,
colour = service,
group = month)) +
geom_point(size = 1L) +
labs(title = "Monthly late train arrivals by service type",
subtitle = "Month: {closest_state}",
x = "Average journey time",
y = "Number of trains arriving late",
color = "Train service type") +
scale_color_viridis_d() +
theme_minimal() +
transition_states(month,
transition_length = 2,
state_length = 1)
anim_save("monthly_late_arrival.gif")
knitr::include_graphics("monthly_late_arrival.gif")
\(~\)
geom_area() examples here. You will look at cumulative harvest of tomato varieties over time. You should do the following:garden_harvest data, filter the data to the tomatoes and find the daily harvest in pounds for each variety.fct_reorder()) from most to least harvested (most on the bottom).I have started the code for you below. The complete() function creates a row for all unique date/variety combinations. If a variety is not harvested on one of the harvest dates in the dataset, it is filled with a value of 0.
tomato_variety_cum_harvest <- garden_harvest %>%
filter(vegetable == "tomatoes") %>%
group_by(date, variety) %>%
summarize(daily_harvest_lb = sum(weight)*0.00220462) %>%
ungroup() %>%
complete(variety, date, fill = list(daily_harvest_lb = 0)) %>%
group_by(variety) %>%
mutate(variety = str_to_title(variety),
cum_harvest_variety = cumsum(daily_harvest_lb)) %>%
ggplot(aes(x = date,
y = cum_harvest_variety)) +
geom_area(aes(fill = fct_reorder(variety, cum_harvest_variety, max))) +
labs(title = "Cumulative harvest (in pounds) for each tomatoe variety",
subtitle = "Variety: {frame_along}",
x = "",
y = "Harvest (lbs)",
fill = "Tomato variety") +
scale_fill_viridis_d() +
transition_reveal(date)
animate(tomato_variety_cum_harvest, duration = 20)
anim_save("tomatoe_variety_harvest.gif")
knitr::include_graphics("tomatoe_variety_harvest.gif")
\(~\)
mallorca_bike_day7 bike ride using animation! Requirements:ggmap.ggimage package and geom_image to add a bike image instead of a red point. You can use this image. See here for an example.mallorca_map <- get_stamenmap(
bbox = c(left = 2.2637, bottom = 39.5137, right = 2.6209, top = 39.7006),
maptype = "terrain",
zoom = 12)
mallorca_bike_day7 <- mallorca_bike_day7 %>%
mutate(bike = "https://raw.githubusercontent.com/llendway/animation_and_interactivity/master/bike.png")
ggmap(mallorca_map) +
geom_path(data = mallorca_bike_day7,
aes(x = lon, y = lat, color = ele),
size = 1.2) +
geom_image(data = mallorca_bike_day7,
aes(x = lon,
y = lat,
image = bike),
size = 0.1) +
annotate("point", x = 2.586255, y = 39.66033, color = "red") +
annotate("text", x = 2.5705, y = 39.650,
color = "purple",
label =
"Starting and
Ending City: Palma") +
labs(title = "Lisa's Mallorca, Spain Biking Journey",
subtitle = "Time: {frame_along}",
x = "",
y = "") +
scale_fill_viridis_d() +
transition_reveal(time) +
scale_color_viridis_c(option = "magma") +
theme_map() +
theme(legend.background = element_blank())
anim_save("lisa_bike_journey.gif")
knitr::include_graphics("lisa_bike_journey.gif")
\(~\)
panama_swim, panama_bike, and panama_run. Create a similar map to the one you created with my cycling data. You will need to make some small changes: 1. combine the files (HINT: bind_rows(), 2. make the leading dot a different color depending on the event (for an extra challenge, make it a different image using `geom_image()!).combined_panama <-bind_rows(panama_swim, panama_bike, panama_run)
panama_map <- get_stamenmap(
bbox = c(left = -79.5762, bottom = 8.9060, right = -79.4878, top = 8.9909),
maptype = "terrain",
zoom = 14)
combined_panama <- combined_panama %>%
mutate(event_icons = case_when(
event == "Bike" ~ "https://raw.githubusercontent.com/llendway/animation_and_interactivity/master/bike.png",
event == "Swim" ~ "https://img.icons8.com/android/24/000000/swimming.png",
event == "Run" ~ "https://img.icons8.com/material-sharp/24/000000/running.png"))
ggmap(panama_map) +
geom_path(data = combined_panama,
aes(x = lon, y = lat, color = event),
size = 1.2) +
geom_image(data = combined_panama,
aes(x = lon,
y = lat,
image = event_icons),
size = 0.1) +
labs(title = "Heather's Panama Triatholon Journey",
subtitle = "Time: {frame_along}",
x = "",
y = "") +
transition_reveal(time) +
scale_color_viridis_d(option = "magma") +
theme_map() +
theme(legend.background = element_blank())
anim_save("triatholon_heather.gif")
knitr::include_graphics("triatholon_heather.gif")
\(~\)
lag() function you’ve used in a previous set of exercises). Replace missing values with 0’s using replace_na().geom_path() and add a group aesthetic. Put the x and y axis on the log scale and make the tick labels look nice - scales::comma is one option. This plot will look pretty ugly as is.geom_point()) and add the state name as a label (geom_text() - you should look at the check_overlap argument).animate() function to have 200 frames in your animation and make it 30 seconds long.covid_state_newcases <- covid19 %>%
group_by(state) %>%
mutate(past_wk = lag(cases, 7, order_by = date)) %>%
replace_na(list(past_wk = 0)) %>%
ungroup() %>%
filter(past_wk >= 20) %>%
mutate(new_case_past_wk = cases - past_wk,
ordered_state = fct_reorder2(state, date, cases)) %>%
ggplot(aes(x = cases, y = new_case_past_wk, group = ordered_state)) +
geom_path(color = "blue",
size = 0.5) +
geom_point(color = "purple",
size = 0.7) +
geom_text(aes(label = ordered_state),
check_overlap = TRUE) +
scale_y_log10(breaks =
scales::trans_breaks(
"log10", function(x)10^x),
labels = scales::comma) +
scale_x_log10(breaks =
scales::trans_breaks(
"log10", function(x)10^x),
labels = scales::comma) +
transition_reveal(date) +
labs(title = "Date: {frame_along}",
x = "Total cases",
y = "New cases in the past week") +
theme(legend.background = element_blank())
animate(covid_state_newcases,
nframes = 200,
duration = 30)
anim_save("covid_state_newcases.gif")
knitr::include_graphics("covid_state_newcases.gif")
\(~\)
states_map data. Here is a list of details you should include in the plot:wday() to create a day of week variable and filter to all the Fridays.animate() function to make the animation 200 frames instead of the default 100 and to pause for 10 frames on the end frame.group = date in aes().census_pop_est_2018 <- read_csv("https://www.dropbox.com/s/6txwv3b4ng7pepe/us_census_2018_state_pop_est.csv?dl=1") %>%
separate(state, into = c("dot","state"), extra = "merge") %>%
select(-dot) %>%
mutate(state = str_to_lower(state))
states_map <- map_data("state")
covid_with_2018pop <-
covid19 %>%
mutate(state = str_to_lower(state),
weekday = wday(date, label = TRUE)) %>%
filter(weekday == "Fri") %>%
left_join(census_pop_est_2018,
by = c("state")) %>%
mutate(covid_per_10000 = (cases/est_pop_2018)*10000) %>%
ggplot(aes(fill = covid_per_10000,
group = date)) +
geom_map(map = states_map,
aes(map_id = state)) +
scale_fill_viridis_c() +
expand_limits(x = states_map$long, y = states_map$lat) +
labs(title = "Most recent COVID cases per 10,000 people in US",
subtitle = "Date: {closest_state}",
fill = "Cases per 10,000 people") +
transition_states(date, transition_length = 0) +
theme_map() +
theme(legend.background = element_blank(),
legend.position = "left",
plot.title = element_text(face = "bold", hjust = 0.5))
animate(covid_with_2018pop,
nframes = 200,
end_pause = 10)
anim_save("covid_state_2018pop.gif")
knitr::include_graphics("covid_state_2018pop.gif")
\(~\)
shiny app (for next week!)NOT DUE THIS WEEK! If any of you want to work ahead, this will be on next week’s exercises.
app.R file you create. Below, you will post a link to the app that you publish on shinyapps.io. You will create an app to compare states’ cumulative number of COVID cases over time. The x-axis will be number of days since 20+ cases and the y-axis will be cumulative cases on the log scale (scale_y_log10()). We use number of days since 20+ cases on the x-axis so we can make better comparisons of the curve trajectories. You will have an input box where the user can choose which states to compare (selectInput()) and have a submit button to click once the user has chosen all states they’re interested in comparing. The graph should display a different line for each state, with labels either on the graph or in a legend. Color can be used if needed.